Palo Alto¶
In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from math import sqrt
import warnings
warnings.filterwarnings("ignore")
data = pd.read_csv('EVChargingStationUsage.csv')
In [2]:
#occurrences of each unique station
station_counts = data['Station Name'].value_counts()
plt.figure(figsize=(15, 10))
station_counts.plot(kind='barh', color='skyblue')
plt.title('Usage Count of Individual Stations')
plt.xlabel('Usage Count')
plt.ylabel('Station Name')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()
In [3]:
#function to extract the entity based on the first word following "PALO ALTO CA /"
def extract_entity(station_name):
parts = station_name.split('/')
if len(parts) > 1:
# Take the part after "PALO ALTO CA /" and then take the first word
return parts[1].strip().split(' ')[0]
else:
# If the station name does not follow the expected format, return the original
return station_name
#function to create the 'Entity' column
data['Entity'] = data['Station Name'].apply(extract_entity)
#occurrences of each entity
entity_counts = data['Entity'].value_counts()
#plotting the usage count of each entity
plt.figure(figsize=(12, 6))
entity_counts.plot(kind='bar', color='skyblue')
plt.title('Usage Count of Each Station Entity')
plt.xlabel('Entity')
plt.ylabel('Usage Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
In [4]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Function to create lagged features
def buildLaggedFeatures(s, lag=30, dropna=True):
df = pd.concat([s.shift(i) for i in range(lag + 1)], axis=1)
df.columns = ['lag_{}'.format(i) if i != 0 else s.name for i in range(lag + 1)]
if dropna:
df = df.dropna()
return df
# Normalize station names
def normalize_station_name(name):
name = name.upper()
if "RINCONADA" in name:
parts = name.split()
new_parts = []
for part in parts:
if part.startswith('LIB') and len(parts) > parts.index(part) + 1 and parts[parts.index(part) + 1].isdigit():
digit = parts.pop(parts.index(part) + 1)
new_parts.append(part + ' #' + digit)
else:
new_parts.append(part)
modified_name = ' '.join(new_parts)
if 'RINCONADA LIB #' in modified_name:
return 'RINCONADA LIB'
return modified_name
if "SHERMAN" in name:
return None
return name.split('#')[0].strip()
# Load data
data = pd.read_csv('EVChargingStationUsage.csv')
data['Normalized Station Name'] = data['Station Name'].apply(normalize_station_name)
data = data.dropna(subset=['Normalized Station Name'])
stations = data['Normalized Station Name'].unique()
# Set up the plot grid
fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(20, 15))
axes = axes.flatten()
fig.subplots_adjust(hspace=0.5, wspace=0.3)
# Processing each normalized station group
for idx, station in enumerate(stations):
if idx >= 9:
break
station_data = data[data['Normalized Station Name'] == station]
station_data['Start DateTime'] = pd.to_datetime(station_data['Start Date'] + ' ' + station_data['Start Time Zone'], errors='coerce')
station_data.dropna(subset=['Start DateTime'], inplace=True)
daily_energy = station_data.groupby(station_data['Start DateTime'].dt.floor('D'))['Energy (kWh)'].sum()
lagged_features = buildLaggedFeatures(daily_energy, lag=30)
train_data = lagged_features.iloc[-(150):-30]
test_data = lagged_features.iloc[-30:]
X_train = train_data.drop(columns=['Energy (kWh)'])
y_train = train_data['Energy (kWh)']
X_test = test_data.drop(columns=['Energy (kWh)'])
y_test = test_data['Energy (kWh)']
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
rf_regressor.fit(X_train, y_train)
y_pred = rf_regressor.predict(X_test)
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'RMSE for {station} Stations: {rmse}')
ax = axes[idx]
ax.plot(y_test.index, y_test, label='Actual', marker='o')
ax.plot(y_test.index, y_pred, label='Forecasted', marker='x')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(mdates.DayLocator(interval=2)) # Set interval to 2 for every second day
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha="right")
ax.set_title(f'Energy Consumption for {station}')
ax.legend()
# Hide any unused axes if there are less than 9 stations
for ax in axes[len(stations):]:
ax.set_visible(False)
plt.show()
RMSE for PALO ALTO CA / HAMILTON Stations: 32.61363452617447 RMSE for PALO ALTO CA / HIGH Stations: 33.610553449706984 RMSE for PALO ALTO CA / BRYANT Stations: 34.20994912491419 RMSE for PALO ALTO CA / MPL Stations: 34.07715427229769 RMSE for RINCONADA LIB Stations: 43.85210518068926 RMSE for PALO ALTO CA / WEBSTER Stations: 43.06912468664212 RMSE for PALO ALTO CA / TED THOMPSON Stations: 37.50685374748391 RMSE for PALO ALTO CA / CAMBRIDGE Stations: 51.12335097196068
Palo Alto - Offset¶
In [5]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Function to create lagged features
def buildLaggedFeatures_off(s, lag=30, offset=0, dropna=True):
# Create a DataFrame with shifted data
df = pd.concat([s.shift(i + offset) for i in range(lag + 1)], axis=1)
# Adjust column names to reflect the lags, considering the offset
df.columns = ['lag_{}'.format(i + offset) if i != 0 else s.name for i in range(lag + 1)]
# Drop rows with missing values if requested
if dropna:
df = df.dropna()
return df
# Normalize station names
def normalize_station_name(name):
name = name.upper()
if "RINCONADA" in name:
parts = name.split()
new_parts = []
for part in parts:
if part.startswith('LIB') and len(parts) > parts.index(part) + 1 and parts[parts.index(part) + 1].isdigit():
digit = parts.pop(parts.index(part) + 1)
new_parts.append(part + ' #' + digit)
else:
new_parts.append(part)
modified_name = ' '.join(new_parts)
if 'RINCONADA LIB #' in modified_name:
return 'RINCONADA LIB'
return modified_name
if "SHERMAN" in name:
return None
return name.split('#')[0].strip()
# Load data
data = pd.read_csv('EVChargingStationUsage.csv')
data['Normalized Station Name'] = data['Station Name'].apply(normalize_station_name)
data = data.dropna(subset=['Normalized Station Name'])
stations = data['Normalized Station Name'].unique()
# Set up the plot grid
fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(20, 15))
axes = axes.flatten()
fig.subplots_adjust(hspace=0.5, wspace=0.3)
# Processing each normalized station group
for idx, station in enumerate(stations):
if idx >= 9:
break
station_data = data[data['Normalized Station Name'] == station]
station_data['Start DateTime'] = pd.to_datetime(station_data['Start Date'] + ' ' + station_data['Start Time Zone'], errors='coerce')
station_data.dropna(subset=['Start DateTime'], inplace=True)
daily_energy = station_data.groupby(station_data['Start DateTime'].dt.floor('D'))['Energy (kWh)'].sum()
lagged_features = buildLaggedFeatures_off(daily_energy, lag=30, offset=7)
train_data = lagged_features.iloc[:-30]
# train_data = lagged_features.iloc[-(150):-30] #error changes based on size of training by rmse=8 cca
test_data = lagged_features.iloc[-30:]
X_train = train_data.drop(columns=['Energy (kWh)'])
y_train = train_data['Energy (kWh)']
X_test = test_data.drop(columns=['Energy (kWh)'])
y_test = test_data['Energy (kWh)']
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
rf_regressor.fit(X_train, y_train)
y_pred = rf_regressor.predict(X_test)
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'RMSE for {station} Stations: {rmse}')
ax = axes[idx]
ax.plot(y_test.index, y_test, label='Actual', marker='o')
ax.plot(y_test.index, y_pred, label='Forecasted', marker='x')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(mdates.DayLocator(interval=2)) # Set interval to 2 for every second day
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha="right")
ax.set_title(f'Energy Consumption for {station}')
ax.legend()
# Hide any unused axes if there are less than 9 stations
for ax in axes[len(stations):]:
ax.set_visible(False)
plt.show()
RMSE for PALO ALTO CA / HAMILTON Stations: 28.544972824540345 RMSE for PALO ALTO CA / HIGH Stations: 33.06261022482949 RMSE for PALO ALTO CA / BRYANT Stations: 39.798684079619456 RMSE for PALO ALTO CA / MPL Stations: 33.974048618823794 RMSE for RINCONADA LIB Stations: 37.3178372979347 RMSE for PALO ALTO CA / WEBSTER Stations: 40.981397027088974 RMSE for PALO ALTO CA / TED THOMPSON Stations: 27.583233061876633 RMSE for PALO ALTO CA / CAMBRIDGE Stations: 52.92351466619919
Dundee¶
In [6]:
data = pd.read_csv('Dundee_merged (1).csv')
In [7]:
#occurrences of each unique station
station_counts = data['Address 1'].value_counts()
plt.figure(figsize=(15, 10))
station_counts.plot(kind='barh', color='skyblue')
plt.title('Usage Count of Individual Stations')
plt.xlabel('Usage Count')
plt.ylabel('Station Name')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()
In [8]:
import pandas as pd
import matplotlib.pyplot as plt
# Assuming data has already been loaded into the 'data' DataFrame
# Filter out 'Trades Lane' and 'Sinclair Street' from the 'Address 1' column
data_filtered = data[~data['Address 1'].isin(['Trades Lane', 'Sinclair Street','Sinclair Street, Dundee'])]
# Get the occurrences of each unique station, now excluding the specified addresses
station_counts = data_filtered['Address 1'].value_counts()
# Plot the usage count of the remaining individual stations
plt.figure(figsize=(15, 10))
station_counts.plot(kind='barh', color='skyblue')
plt.title('Usage Count of Individual Stations')
plt.xlabel('Usage Count')
plt.ylabel('Station Name')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()
In [9]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Function to create lagged features
def buildLaggedFeatures(s, lag=30, dropna=True):
df = pd.concat([s.shift(i) for i in range(lag + 1)], axis=1)
df.columns = ['lag_{}'.format(i) if i != 0 else s.name for i in range(lag + 1)]
if dropna:
df = df.dropna()
return df
# Load data
data = pd.read_csv('Dundee_merged (1).csv') # Ensure the path is correct
# Filter out specific stations
data_filtered = data[~data['Address 1'].isin(['Trades Lane', 'Sinclair Street', 'Sinclair Street, Dundee'])]
# Convert 'Start Date' to datetime
data_filtered['Start DateTime'] = pd.to_datetime(data_filtered['Start Date'], errors='coerce')
data_filtered.dropna(subset=['Start DateTime', 'Energy(kWh)'], inplace=True)
# Get unique stations after filtering
unique_stations = data_filtered['Address 1'].unique()
# Prepare to dynamically create subplots based on data availability
valid_data_stations = [] # To store stations with enough data
# Loop through each station and check data
for station in unique_stations:
station_data = data_filtered[data_filtered['Address 1'] == station]
daily_energy = station_data.groupby(station_data['Start DateTime'].dt.floor('D'))['Energy(kWh)'].sum()
lagged_features = buildLaggedFeatures(daily_energy, lag=30)
if lagged_features.shape[0] > 30: # Ensuring enough data for split
valid_data_stations.append(station)
# Number of plots
n_cols = 5
n_rows = (len(valid_data_stations) + n_cols - 1) // n_cols
fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(n_cols * 5, n_rows * 4)) # Slightly increased size
axes = axes.flatten()
# Loop again to model and plot only where data is sufficient
for idx, station in enumerate(valid_data_stations):
ax = axes[idx]
station_data = data_filtered[data_filtered['Address 1'] == station]
daily_energy = station_data.groupby(station_data['Start DateTime'].dt.floor('D'))['Energy(kWh)'].sum()
lagged_features = buildLaggedFeatures(daily_energy, lag=30)
train_data = lagged_features.iloc[-(150):-30]
test_data = lagged_features.iloc[-30:]
X_train = train_data.drop(columns=['Energy(kWh)'])
y_train = train_data['Energy(kWh)']
X_test = test_data.drop(columns=['Energy(kWh)'])
y_test = test_data['Energy(kWh)']
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
rf_regressor.fit(X_train, y_train)
y_pred = rf_regressor.predict(X_test)
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'RMSE for {station} Stations: {rmse}')
# Set custom date ticks
dates = y_test.index.tolist()
ticks_to_use = [dates[0], dates[len(dates)//3], dates[2*len(dates)//3], dates[-1]]
ax.set_xticks(ticks_to_use)
ax.plot(y_test.index, y_test, label='Actual', marker='o')
ax.plot(y_test.index, y_pred, label='Forecasted', marker='x')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.set_title(station, fontsize=10)
ax.legend()
# Hide any unused axes
for j in range(len(valid_data_stations), len(axes)):
axes[j].axis('off')
fig.tight_layout(pad=4.0) # Adjusted padding for clarity
plt.show()
RMSE for Lochee Charging Hub, Dundee Stations: 125.73072340697016 RMSE for Greenmarket Multi Car Park, Dundee Stations: 41.03446318761495 RMSE for Queen Street Car Park, Broughty Ferry, Dundee Stations: 51.5228894107852 RMSE for Housing Office West, Dundee Stations: 10.076021070343195 RMSE for Nethergate, Dundee Stations: 12.443212563723245 RMSE for Brington Place Sheltered Housing, Dundee Stations: 4.413265415615367 RMSE for Balunie Drive, Dundee Stations: 12.25633817632058 RMSE for Social Work Building, Jack Martin Way, Dundee Stations: 7.5544917183593485 RMSE for Dundee Ice Arena, Dundee Stations: 54.43000965362863 RMSE for Mitchell Street, Dundee Stations: 5.743683789085189 RMSE for Oakland Day Centre, Dundee Stations: 7.957195877401366 RMSE for Dock Street, Dundee Stations: 20.95884889737276 RMSE for Whitfield Centre, Dundee Stations: 18.775805519187006 RMSE for Housing Office East, Dundee Stations: 15.81541397932199 RMSE for Gellatly Street Car Park, Dundee Stations: 23.397402274384525 RMSE for Dundee House, Dundee Stations: 263.2790289046673 RMSE for Public Works Dept, Clepington Rd. Dundee Stations: 25.321952059399305 RMSE for Marchbanks, Dundee Stations: 19.11664194555798 RMSE for Olympia Multi-Storey Car Park, Dundee Stations: 13.944673154243047 RMSE for South Tay Street, Dundee Stations: 20.119073641666752 RMSE for Ardler Complex, Dundee Stations: 10.701520355211217 RMSE for Menziehill House, Dundee Stations: 11.746132847083475 RMSE for Turriff House Rannoch Road, Dundee Stations: 9.516726429292797 RMSE for Trades Lane, Dundee Stations: 12.262027054013133 RMSE for University of Dundee, Nethergate, Dundee Stations: 12.498735394878022 RMSE for Janet Brougham House, Dundee Stations: 8.38937608866118 RMSE for South Tay Street Stations: 18.178915328626093 RMSE for Earn Cresent, Dundee Stations: 8.808114916049478 RMSE for DCC Environment, 34 Harefield Road Stations: 15.054936091461833
Dundee - Offset¶
In [10]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Function to create lagged features
def buildLaggedFeatures_off(s, lag=30, offset=0, dropna=True):
# Create a DataFrame with shifted data
df = pd.concat([s.shift(i + offset) for i in range(lag + 1)], axis=1)
# Adjust column names to reflect the lags, considering the offset
df.columns = ['lag_{}'.format(i + offset) if i != 0 else s.name for i in range(lag + 1)]
# Drop rows with missing values if requested
if dropna:
df = df.dropna()
return df
# Load data
data = pd.read_csv('Dundee_merged (1).csv') # Ensure the path is correct
# Filter out specific stations
data_filtered = data[~data['Address 1'].isin(['Trades Lane', 'Sinclair Street', 'Sinclair Street, Dundee'])]
# Convert 'Start Date' to datetime
data_filtered['Start DateTime'] = pd.to_datetime(data_filtered['Start Date'], errors='coerce')
data_filtered.dropna(subset=['Start DateTime', 'Energy(kWh)'], inplace=True)
# Get unique stations after filtering
unique_stations = data_filtered['Address 1'].unique()
# Prepare to dynamically create subplots based on data availability
valid_data_stations = [] # To store stations with enough data
# Loop through each station and check data
for station in unique_stations:
station_data = data_filtered[data_filtered['Address 1'] == station]
daily_energy = station_data.groupby(station_data['Start DateTime'].dt.floor('D'))['Energy(kWh)'].sum()
lagged_features = buildLaggedFeatures_off(daily_energy, lag=30,offset=7)
if lagged_features.shape[0] > 30: # Ensuring enough data for split
valid_data_stations.append(station)
# Number of plots
n_cols = 5
n_rows = (len(valid_data_stations) + n_cols - 1) // n_cols
fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(n_cols * 5, n_rows * 4)) # Slightly increased size
axes = axes.flatten()
# Loop again to model and plot only where data is sufficient
for idx, station in enumerate(valid_data_stations):
ax = axes[idx]
station_data = data_filtered[data_filtered['Address 1'] == station]
daily_energy = station_data.groupby(station_data['Start DateTime'].dt.floor('D'))['Energy(kWh)'].sum()
lagged_features = buildLaggedFeatures_off(daily_energy, lag=30, offset=7)
train_data = lagged_features.iloc[:-30]
test_data = lagged_features.iloc[-30:]
X_train = train_data.drop(columns=['Energy(kWh)'])
y_train = train_data['Energy(kWh)']
X_test = test_data.drop(columns=['Energy(kWh)'])
y_test = test_data['Energy(kWh)']
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
rf_regressor.fit(X_train, y_train)
y_pred = rf_regressor.predict(X_test)
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'RMSE for {station} Stations: {rmse}')
# Set custom date ticks
dates = y_test.index.tolist()
ticks_to_use = [dates[0], dates[len(dates)//3], dates[2*len(dates)//3], dates[-1]]
ax.set_xticks(ticks_to_use)
ax.plot(y_test.index, y_test, label='Actual', marker='o')
ax.plot(y_test.index, y_pred, label='Forecasted', marker='x')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.set_title(station, fontsize=10)
ax.legend()
# Hide any unused axes
for j in range(len(valid_data_stations), len(axes)):
axes[j].axis('off')
fig.tight_layout(pad=4.0) # Adjusted padding for clarity
plt.show()
RMSE for Lochee Charging Hub, Dundee Stations: 125.37137497879347 RMSE for Greenmarket Multi Car Park, Dundee Stations: 41.834306202468724 RMSE for Queen Street Car Park, Broughty Ferry, Dundee Stations: 45.201711295329915 RMSE for Housing Office West, Dundee Stations: 8.974539660023238 RMSE for Nethergate, Dundee Stations: 11.976774163911305 RMSE for Brington Place Sheltered Housing, Dundee Stations: 4.423448317960397 RMSE for Balunie Drive, Dundee Stations: 11.778013482105825 RMSE for Social Work Building, Jack Martin Way, Dundee Stations: 7.627394181354032 RMSE for Dundee Ice Arena, Dundee Stations: 53.10681582086938 RMSE for Mitchell Street, Dundee Stations: 5.162411034164818 RMSE for Oakland Day Centre, Dundee Stations: 8.674263053232051 RMSE for Dock Street, Dundee Stations: 20.425357605117792 RMSE for Whitfield Centre, Dundee Stations: 20.417607196486074 RMSE for Housing Office East, Dundee Stations: 15.751217556313966 RMSE for Gellatly Street Car Park, Dundee Stations: 25.90312299511663 RMSE for Dundee House, Dundee Stations: 262.17560758434223 RMSE for Public Works Dept, Clepington Rd. Dundee Stations: 20.52536201045445 RMSE for Marchbanks, Dundee Stations: 17.4355007383499 RMSE for Olympia Multi-Storey Car Park, Dundee Stations: 12.767447010750951 RMSE for South Tay Street, Dundee Stations: 19.786499235523195 RMSE for Ardler Complex, Dundee Stations: 10.950630858828793 RMSE for Menziehill House, Dundee Stations: 11.715689098711467 RMSE for Turriff House Rannoch Road, Dundee Stations: 7.613027997058728 RMSE for Trades Lane, Dundee Stations: 10.000442342393494 RMSE for University of Dundee, Nethergate, Dundee Stations: 12.66127764196015 RMSE for Janet Brougham House, Dundee Stations: 8.403209720735685 RMSE for South Tay Street Stations: 18.736883805267972 RMSE for Earn Cresent, Dundee Stations: 9.479696697539078 RMSE for DCC Environment, 34 Harefield Road Stations: 15.234425523804084
Perth¶
In [11]:
data = pd.read_csv('Perth&Kinross_merged1.csv')
In [12]:
#occurrences of each unique station
station_counts = data['Adress 1'].value_counts()
plt.figure(figsize=(15, 10))
station_counts.plot(kind='barh', color='skyblue')
plt.title('Usage Count of Individual Stations')
plt.xlabel('Usage Count')
plt.ylabel('Station Name')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()
In [13]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Function to create lagged features
def buildLaggedFeatures(s, lag=30, dropna=True):
df = pd.concat([s.shift(i) for i in range(lag + 1)], axis=1)
df.columns = ['lag_{}'.format(i) if i != 0 else s.name for i in range(lag + 1)]
if dropna:
df = df.dropna()
return df
# Load the new dataset
data = pd.read_csv('Perth&Kinross_merged1.csv') # Update the path to your dataset
# Convert 'Start Date' to datetime, assuming 'Start Date' is the column name
data['Start DateTime'] = pd.to_datetime(data['Start Date'], errors='coerce')
data.dropna(subset=['Start DateTime', 'Energy(kWh)'], inplace=True)
# Get unique stations after filtering
unique_stations = data['Adress 1'].unique()
# Number of plots
n_cols = 5
n_rows = (len(unique_stations) + n_cols - 1) // n_cols
fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(n_cols * 5, n_rows * 4))
axes = axes.flatten()
# Loop through each station and plot
for idx, station in enumerate(unique_stations):
ax = axes[idx]
station_data = data[data['Adress 1'] == station]
daily_energy = station_data.groupby(station_data['Start DateTime'].dt.floor('D'))['Energy(kWh)'].sum()
lagged_features = buildLaggedFeatures(daily_energy, lag=30)
if len(lagged_features) > 30:
train_data = lagged_features.iloc[:-30]
test_data = lagged_features.iloc[-30:]
X_train = train_data.drop('Energy(kWh)', axis=1)
y_train = train_data['Energy(kWh)']
X_test = test_data.drop('Energy(kWh)', axis=1)
y_test = test_data['Energy(kWh)']
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
rf_regressor.fit(X_train, y_train)
y_pred = rf_regressor.predict(X_test)
ax.plot(y_test.index, y_test, label='Actual', marker='o')
ax.plot(y_test.index, y_pred, label='Forecasted', marker='x')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
else:
# Display message if not enough data but keep the plot clean
ax.text(0.5, 0.5, 'Not enough data', horizontalalignment='center', verticalalignment='center', fontsize=12, transform=ax.transAxes)
ax.set_title(station, fontsize=10)
ax.legend()
# Hide any unused axes if there are less than planned
for j in range(idx + 1, len(axes)):
axes[j].axis('off')
fig.tight_layout(pad=4.0) # Adjusted padding for clarity
plt.show()
No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument. No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.